--- title: Preliminary imports keywords: fastai sidebar: home_sidebar nb_path: "worlds.ipynb" ---
try:
import google.colab
IN_COLAB = True
except:
IN_COLAB = False
IN_MAIN = __name__ == '__main__'
if IN_COLAB and IN_MAIN:
home_dir = '/content/drive/MyDrive/Colab Notebooks/Ecosystems/v3'
if IN_COLAB and IN_MAIN:
from google.colab import drive
drive.mount('/content/drive')
import sys
sys.path.append(home_dir)
%cd $home_dir
if IN_COLAB and IN_MAIN:
!cd;pip -q install import-ipynb
!cd;pip -q install stable-baselines3[extra]
!cd;apt install swig
!cd;pip -q install box2d box2d-kengz
#verbose = 0
import json
import random
from IPython.display import HTML
import gym
from gym import spaces
# This has to be imported before our own notebook imports.
#import import_ipynb
import matplotlib
import matplotlib.pyplot as plt
from matplotlib import animation
import numpy as np
import pandas as pd
import seaborn as sns
from ecotwins.animal_classes import Ecosystem, MultiSheep, SimpleSheep, Terrain
# import ecoenv
from ecotwins.ecoenv import EcoEnv
# from perception import Perception
from stable_baselines3 import PPO, A2C, SAC, DDPG, TD3 # , DQN
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
# from utility import distance, draw_objects, motion_diagram, normalize
from ecotwins.animation_helper import AnimationHelper
from ecotwins.reflex_agent import ReflexAgent
from ecotwins.animal_classes import Ecosystem, MultiSheep, SimpleSheep
t = Terrain(objects={'dandelion': 1})
# t = Terrain(objects={'dandelion': (np.random.random((100,2)) - 0.5) * 20})
hyperparameters = {'max_age': 2000, 'delta': 0.2, 'close': 5, 'gamma': 0.9}
agent = SimpleSheep(distances={'dandelion':28}, hyperparameters=hyperparameters)
eco = Ecosystem(t, agent)
env = EcoEnv(eco)
# Create the model
model = PPO('MlpPolicy', env, verbose=1)
# model.set_env(env)
# Train the model
model.learn(total_timesteps=2e5)
# Evaluate the model
# NOTE: If you use wrappers with your environment that modify rewards,
# this will be reflected here. To evaluate with original rewards,
# wrap environment in a "Monitor" wrapper before other wrappers.
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward}, std. dev.: {std_reward}')
# Save the model
#model.save("ecosystem")
#del model # delete trained model to demonstrate loading
# Load the trained model
#model = DQN.load("ecosystem", env=env)
# Enjoy trained model
# obs = env.reset() # Generate a new map? Returns an initial observation
# trace = [env.position.copy()]
# total_reward = 0
# # for i in range(ecoenv.TRACE_LENGTH): # Take a walk of length ecoenv.TRACE_LENGTH (not EPISODE_LENGTH as in training)
# for i in range(2000):
# action, _states = model.predict(obs, deterministic=True) # Select action
# obs, reward, dones, info = env.step(action) # Compute consequences
# assert(reward >= 0)
# total_reward += reward
# trace.append(env.position.copy())
# trace = np.array(trace)
# env.render(trace) # Show walk
# plt.title(f'Total reward: {total_reward}');
/home/niklas/miniconda3/envs/gym/lib/python3.9/site-packages/torch/cuda/__init__.py:52: UserWarning: CUDA initialization: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero. (Triggered internally at /opt/conda/conda-bld/pytorch_1616554798336/work/c10/cuda/CUDAFunctions.cpp:109.) return torch._C._cuda_getDeviceCount() > 0
Using cpu device Wrapping the env with a `Monitor` wrapper Wrapping the env in a DummyVecEnv. Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00, water:2.00 happiness: 0.00 Reset@2001, accumulated reward: 0.00, Interoception levels: energy:3.00, water:2.00 happiness: 0.00 --------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 0 | | time/ | | | fps | 1001 | | iterations | 1 | | time_elapsed | 2 | | total_timesteps | 2048 | --------------------------------- Reset@2001, accumulated reward: 1.00, Interoception levels: energy:3.00, water:2.00 happiness: 2.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 0.5 | | time/ | | | fps | 719 | | iterations | 2 | | time_elapsed | 5 | | total_timesteps | 4096 | | train/ | | | approx_kl | 0.019854717 | | clip_fraction | 0.222 | | clip_range | 0.2 | | entropy_loss | -2.82 | | explained_variance | 0.766 | | learning_rate | 0.0003 | | loss | -0.000985 | | n_updates | 10 | | policy_gradient_loss | -0.0261 | | std | 0.978 | | value_loss | 2.59e-05 | ----------------------------------------- Reset@2001, accumulated reward: 1.00, Interoception levels: energy:3.00, water:2.00 happiness: 1.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 0.667 | | time/ | | | fps | 688 | | iterations | 3 | | time_elapsed | 8 | | total_timesteps | 6144 | | train/ | | | approx_kl | 0.007154006 | | clip_fraction | 0.0597 | | clip_range | 0.2 | | entropy_loss | -2.77 | | explained_variance | 0.00349 | | learning_rate | 0.0003 | | loss | 0.00258 | | n_updates | 20 | | policy_gradient_loss | -0.003 | | std | 0.958 | | value_loss | 0.00463 | ----------------------------------------- Reset@2001, accumulated reward: 1.00, Interoception levels: energy:3.00, water:2.00 happiness: 2.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 0.75 | | time/ | | | fps | 665 | | iterations | 4 | | time_elapsed | 12 | | total_timesteps | 8192 | | train/ | | | approx_kl | 0.002791753 | | clip_fraction | 0.0238 | | clip_range | 0.2 | | entropy_loss | -2.71 | | explained_variance | -0.0664 | | learning_rate | 0.0003 | | loss | -0.0094 | | n_updates | 30 | | policy_gradient_loss | -0.00281 | | std | 0.921 | | value_loss | 0.00421 | ----------------------------------------- Reset@2001, accumulated reward: 2.00, Interoception levels: energy:3.00, water:2.00 happiness: 2.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 1 | | time/ | | | fps | 648 | | iterations | 5 | | time_elapsed | 15 | | total_timesteps | 10240 | | train/ | | | approx_kl | 0.009861134 | | clip_fraction | 0.0804 | | clip_range | 0.2 | | entropy_loss | -2.69 | | explained_variance | 0.921 | | learning_rate | 0.0003 | | loss | -0.0183 | | n_updates | 40 | | policy_gradient_loss | -0.00923 | | std | 0.934 | | value_loss | 8.36e-06 | ----------------------------------------- Reset@2001, accumulated reward: 10.00, Interoception levels: energy:3.00, water:2.00 happiness: 10.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 2.5 | | time/ | | | fps | 643 | | iterations | 6 | | time_elapsed | 19 | | total_timesteps | 12288 | | train/ | | | approx_kl | 0.007467295 | | clip_fraction | 0.0724 | | clip_range | 0.2 | | entropy_loss | -2.68 | | explained_variance | 0.205 | | learning_rate | 0.0003 | | loss | -0.00737 | | n_updates | 50 | | policy_gradient_loss | -0.00673 | | std | 0.917 | | value_loss | 0.00771 | ----------------------------------------- Reset@2001, accumulated reward: 19.00, Interoception levels: energy:3.00, water:2.00 happiness: 20.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 4.86 | | time/ | | | fps | 638 | | iterations | 7 | | time_elapsed | 22 | | total_timesteps | 14336 | | train/ | | | approx_kl | 0.0042496724 | | clip_fraction | 0.0782 | | clip_range | 0.2 | | entropy_loss | -2.64 | | explained_variance | 0.0249 | | learning_rate | 0.0003 | | loss | 0.00388 | | n_updates | 60 | | policy_gradient_loss | -0.0049 | | std | 0.897 | | value_loss | 0.0661 | ------------------------------------------ Reset@2001, accumulated reward: 32.00, Interoception levels: energy:3.00, water:2.00 happiness: 32.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 8.25 | | time/ | | | fps | 636 | | iterations | 8 | | time_elapsed | 25 | | total_timesteps | 16384 | | train/ | | | approx_kl | 0.0072333654 | | clip_fraction | 0.0717 | | clip_range | 0.2 | | entropy_loss | -2.61 | | explained_variance | 0.0671 | | learning_rate | 0.0003 | | loss | 0.012 | | n_updates | 70 | | policy_gradient_loss | -0.00445 | | std | 0.885 | | value_loss | 0.0843 | ------------------------------------------ Reset@2001, accumulated reward: 27.00, Interoception levels: energy:3.00, water:2.00 happiness: 27.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 10.3 | | time/ | | | fps | 633 | | iterations | 9 | | time_elapsed | 29 | | total_timesteps | 18432 | | train/ | | | approx_kl | 0.009428272 | | clip_fraction | 0.0826 | | clip_range | 0.2 | | entropy_loss | -2.58 | | explained_variance | 0.029 | | learning_rate | 0.0003 | | loss | 0.038 | | n_updates | 80 | | policy_gradient_loss | -0.00626 | | std | 0.874 | | value_loss | 0.203 | ----------------------------------------- Reset@2001, accumulated reward: 33.00, Interoception levels: energy:3.00, water:2.00 happiness: 33.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 12.6 | | time/ | | | fps | 628 | | iterations | 10 | | time_elapsed | 32 | | total_timesteps | 20480 | | train/ | | | approx_kl | 0.008150881 | | clip_fraction | 0.0685 | | clip_range | 0.2 | | entropy_loss | -2.55 | | explained_variance | 0.0869 | | learning_rate | 0.0003 | | loss | 0.0233 | | n_updates | 90 | | policy_gradient_loss | -0.00352 | | std | 0.858 | | value_loss | 0.125 | ----------------------------------------- Reset@2001, accumulated reward: 48.00, Interoception levels: energy:3.00, water:2.00 happiness: 48.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 15.8 | | time/ | | | fps | 617 | | iterations | 11 | | time_elapsed | 36 | | total_timesteps | 22528 | | train/ | | | approx_kl | 0.004007942 | | clip_fraction | 0.0792 | | clip_range | 0.2 | | entropy_loss | -2.52 | | explained_variance | 0.0866 | | learning_rate | 0.0003 | | loss | 0.0647 | | n_updates | 100 | | policy_gradient_loss | -0.00605 | | std | 0.847 | | value_loss | 0.158 | ----------------------------------------- Reset@2001, accumulated reward: 39.00, Interoception levels: energy:3.00, water:2.00 happiness: 39.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 17.8 | | time/ | | | fps | 607 | | iterations | 12 | | time_elapsed | 40 | | total_timesteps | 24576 | | train/ | | | approx_kl | 0.008357996 | | clip_fraction | 0.0697 | | clip_range | 0.2 | | entropy_loss | -2.5 | | explained_variance | 0.0837 | | learning_rate | 0.0003 | | loss | 0.116 | | n_updates | 110 | | policy_gradient_loss | -0.00541 | | std | 0.839 | | value_loss | 0.24 | ----------------------------------------- Reset@2001, accumulated reward: 44.00, Interoception levels: energy:3.00, water:2.00 happiness: 44.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 19.8 | | time/ | | | fps | 607 | | iterations | 13 | | time_elapsed | 43 | | total_timesteps | 26624 | | train/ | | | approx_kl | 0.0080226455 | | clip_fraction | 0.0564 | | clip_range | 0.2 | | entropy_loss | -2.47 | | explained_variance | -0.0042 | | learning_rate | 0.0003 | | loss | 0.0821 | | n_updates | 120 | | policy_gradient_loss | -0.00428 | | std | 0.828 | | value_loss | 0.124 | ------------------------------------------ Reset@2001, accumulated reward: 63.00, Interoception levels: energy:3.00, water:2.00 happiness: 63.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 22.9 | | time/ | | | fps | 609 | | iterations | 14 | | time_elapsed | 47 | | total_timesteps | 28672 | | train/ | | | approx_kl | 0.0055861357 | | clip_fraction | 0.0681 | | clip_range | 0.2 | | entropy_loss | -2.45 | | explained_variance | 0.0111 | | learning_rate | 0.0003 | | loss | 0.0919 | | n_updates | 130 | | policy_gradient_loss | -0.00517 | | std | 0.823 | | value_loss | 0.21 | ------------------------------------------ Reset@2001, accumulated reward: 57.00, Interoception levels: energy:3.00, water:2.00 happiness: 58.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 25.1 | | time/ | | | fps | 607 | | iterations | 15 | | time_elapsed | 50 | | total_timesteps | 30720 | | train/ | | | approx_kl | 0.0039899433 | | clip_fraction | 0.0729 | | clip_range | 0.2 | | entropy_loss | -2.44 | | explained_variance | 0.002 | | learning_rate | 0.0003 | | loss | 0.277 | | n_updates | 140 | | policy_gradient_loss | -0.00439 | | std | 0.817 | | value_loss | 0.402 | ------------------------------------------ Reset@2001, accumulated reward: 47.00, Interoception levels: energy:3.00, water:2.00 happiness: 47.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 26.5 | | time/ | | | fps | 605 | | iterations | 16 | | time_elapsed | 54 | | total_timesteps | 32768 | | train/ | | | approx_kl | 0.0037600603 | | clip_fraction | 0.0231 | | clip_range | 0.2 | | entropy_loss | -2.42 | | explained_variance | 0.0195 | | learning_rate | 0.0003 | | loss | 0.115 | | n_updates | 150 | | policy_gradient_loss | -0.00157 | | std | 0.804 | | value_loss | 0.239 | ------------------------------------------ Reset@2001, accumulated reward: 64.00, Interoception levels: energy:3.00, water:2.00 happiness: 64.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 28.7 | | time/ | | | fps | 605 | | iterations | 17 | | time_elapsed | 57 | | total_timesteps | 34816 | | train/ | | | approx_kl | 0.0034078541 | | clip_fraction | 0.0397 | | clip_range | 0.2 | | entropy_loss | -2.39 | | explained_variance | 0.0705 | | learning_rate | 0.0003 | | loss | 0.104 | | n_updates | 160 | | policy_gradient_loss | -0.00153 | | std | 0.793 | | value_loss | 0.252 | ------------------------------------------ Reset@2001, accumulated reward: 70.00, Interoception levels: energy:3.00, water:2.00 happiness: 71.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 31 | | time/ | | | fps | 606 | | iterations | 18 | | time_elapsed | 60 | | total_timesteps | 36864 | | train/ | | | approx_kl | 0.009088224 | | clip_fraction | 0.0666 | | clip_range | 0.2 | | entropy_loss | -2.36 | | explained_variance | 0.0283 | | learning_rate | 0.0003 | | loss | 0.108 | | n_updates | 170 | | policy_gradient_loss | -0.00355 | | std | 0.785 | | value_loss | 0.291 | ----------------------------------------- Reset@2001, accumulated reward: 64.00, Interoception levels: energy:3.00, water:2.00 happiness: 64.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 32.7 | | time/ | | | fps | 607 | | iterations | 19 | | time_elapsed | 64 | | total_timesteps | 38912 | | train/ | | | approx_kl | 0.0062267473 | | clip_fraction | 0.0261 | | clip_range | 0.2 | | entropy_loss | -2.36 | | explained_variance | -0.0142 | | learning_rate | 0.0003 | | loss | 0.152 | | n_updates | 180 | | policy_gradient_loss | -0.0013 | | std | 0.787 | | value_loss | 0.289 | ------------------------------------------ Reset@2001, accumulated reward: 72.00, Interoception levels: energy:3.00, water:2.00 happiness: 72.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 34.7 | | time/ | | | fps | 608 | | iterations | 20 | | time_elapsed | 67 | | total_timesteps | 40960 | | train/ | | | approx_kl | 0.0032257212 | | clip_fraction | 0.0517 | | clip_range | 0.2 | | entropy_loss | -2.35 | | explained_variance | 0.042 | | learning_rate | 0.0003 | | loss | 0.212 | | n_updates | 190 | | policy_gradient_loss | -0.00282 | | std | 0.783 | | value_loss | 0.346 | ------------------------------------------ Reset@2001, accumulated reward: 71.00, Interoception levels: energy:3.00, water:2.00 happiness: 72.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 36.4 | | time/ | | | fps | 606 | | iterations | 21 | | time_elapsed | 70 | | total_timesteps | 43008 | | train/ | | | approx_kl | 0.0016226672 | | clip_fraction | 0.0544 | | clip_range | 0.2 | | entropy_loss | -2.33 | | explained_variance | 0.0229 | | learning_rate | 0.0003 | | loss | 0.155 | | n_updates | 200 | | policy_gradient_loss | -0.00328 | | std | 0.772 | | value_loss | 0.235 | ------------------------------------------ Reset@2001, accumulated reward: 65.00, Interoception levels: energy:3.00, water:2.00 happiness: 66.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 37.7 | | time/ | | | fps | 605 | | iterations | 22 | | time_elapsed | 74 | | total_timesteps | 45056 | | train/ | | | approx_kl | 0.004408701 | | clip_fraction | 0.0374 | | clip_range | 0.2 | | entropy_loss | -2.31 | | explained_variance | -0.0432 | | learning_rate | 0.0003 | | loss | 0.233 | | n_updates | 210 | | policy_gradient_loss | -0.0012 | | std | 0.764 | | value_loss | 0.367 | ----------------------------------------- Reset@2001, accumulated reward: 54.00, Interoception levels: energy:3.00, water:2.00 happiness: 54.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 38.4 | | time/ | | | fps | 603 | | iterations | 23 | | time_elapsed | 78 | | total_timesteps | 47104 | | train/ | | | approx_kl | 0.005860232 | | clip_fraction | 0.0218 | | clip_range | 0.2 | | entropy_loss | -2.28 | | explained_variance | 0.0468 | | learning_rate | 0.0003 | | loss | 0.162 | | n_updates | 220 | | policy_gradient_loss | -0.000664 | | std | 0.751 | | value_loss | 0.26 | ----------------------------------------- Reset@2001, accumulated reward: 66.00, Interoception levels: energy:3.00, water:2.00 happiness: 66.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 39.6 | | time/ | | | fps | 600 | | iterations | 24 | | time_elapsed | 81 | | total_timesteps | 49152 | | train/ | | | approx_kl | 0.0015669339 | | clip_fraction | 0.0298 | | clip_range | 0.2 | | entropy_loss | -2.26 | | explained_variance | 0.0075 | | learning_rate | 0.0003 | | loss | 0.21 | | n_updates | 230 | | policy_gradient_loss | -0.00142 | | std | 0.746 | | value_loss | 0.265 | ------------------------------------------ Reset@2001, accumulated reward: 73.00, Interoception levels: energy:3.00, water:2.00 happiness: 73.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 40.9 | | time/ | | | fps | 596 | | iterations | 25 | | time_elapsed | 85 | | total_timesteps | 51200 | | train/ | | | approx_kl | 0.0026827762 | | clip_fraction | 0.0351 | | clip_range | 0.2 | | entropy_loss | -2.25 | | explained_variance | -0.0277 | | learning_rate | 0.0003 | | loss | 0.121 | | n_updates | 240 | | policy_gradient_loss | -0.000961 | | std | 0.743 | | value_loss | 0.356 | ------------------------------------------ Reset@2001, accumulated reward: 75.00, Interoception levels: energy:3.00, water:2.00 happiness: 76.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 42.2 | | time/ | | | fps | 597 | | iterations | 26 | | time_elapsed | 89 | | total_timesteps | 53248 | | train/ | | | approx_kl | 0.0042852117 | | clip_fraction | 0.0361 | | clip_range | 0.2 | | entropy_loss | -2.24 | | explained_variance | -0.00488 | | learning_rate | 0.0003 | | loss | 0.324 | | n_updates | 250 | | policy_gradient_loss | -0.00246 | | std | 0.738 | | value_loss | 0.344 | ------------------------------------------ Reset@2001, accumulated reward: 78.00, Interoception levels: energy:3.00, water:2.00 happiness: 78.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 43.6 | | time/ | | | fps | 596 | | iterations | 27 | | time_elapsed | 92 | | total_timesteps | 55296 | | train/ | | | approx_kl | 0.005147939 | | clip_fraction | 0.0291 | | clip_range | 0.2 | | entropy_loss | -2.23 | | explained_variance | -0.0088 | | learning_rate | 0.0003 | | loss | 0.0796 | | n_updates | 260 | | policy_gradient_loss | -0.00105 | | std | 0.736 | | value_loss | 0.314 | ----------------------------------------- Reset@2001, accumulated reward: 80.00, Interoception levels: energy:3.00, water:2.00 happiness: 80.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 44.9 | | time/ | | | fps | 597 | | iterations | 28 | | time_elapsed | 96 | | total_timesteps | 57344 | | train/ | | | approx_kl | 0.009069825 | | clip_fraction | 0.0489 | | clip_range | 0.2 | | entropy_loss | -2.23 | | explained_variance | 0.00459 | | learning_rate | 0.0003 | | loss | 0.13 | | n_updates | 270 | | policy_gradient_loss | -0.00184 | | std | 0.736 | | value_loss | 0.373 | ----------------------------------------- Reset@2001, accumulated reward: 66.00, Interoception levels: energy:3.00, water:2.00 happiness: 67.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 45.6 | | time/ | | | fps | 597 | | iterations | 29 | | time_elapsed | 99 | | total_timesteps | 59392 | | train/ | | | approx_kl | 0.007826674 | | clip_fraction | 0.0556 | | clip_range | 0.2 | | entropy_loss | -2.21 | | explained_variance | -0.0574 | | learning_rate | 0.0003 | | loss | 0.14 | | n_updates | 280 | | policy_gradient_loss | -0.00381 | | std | 0.725 | | value_loss | 0.28 | ----------------------------------------- Reset@2001, accumulated reward: 73.00, Interoception levels: energy:3.00, water:2.00 happiness: 73.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 46.5 | | time/ | | | fps | 597 | | iterations | 30 | | time_elapsed | 102 | | total_timesteps | 61440 | | train/ | | | approx_kl | 0.002231936 | | clip_fraction | 0.0308 | | clip_range | 0.2 | | entropy_loss | -2.18 | | explained_variance | -0.0312 | | learning_rate | 0.0003 | | loss | 0.301 | | n_updates | 290 | | policy_gradient_loss | 0.000367 | | std | 0.714 | | value_loss | 0.307 | ----------------------------------------- Reset@2001, accumulated reward: 72.00, Interoception levels: energy:3.00, water:2.00 happiness: 72.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 47.3 | | time/ | | | fps | 598 | | iterations | 31 | | time_elapsed | 106 | | total_timesteps | 63488 | | train/ | | | approx_kl | 0.009269917 | | clip_fraction | 0.0855 | | clip_range | 0.2 | | entropy_loss | -2.15 | | explained_variance | 0.0114 | | learning_rate | 0.0003 | | loss | 0.112 | | n_updates | 300 | | policy_gradient_loss | -0.00614 | | std | 0.703 | | value_loss | 0.298 | ----------------------------------------- Reset@2001, accumulated reward: 81.00, Interoception levels: energy:3.00, water:2.00 happiness: 81.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 48.4 | | time/ | | | fps | 599 | | iterations | 32 | | time_elapsed | 109 | | total_timesteps | 65536 | | train/ | | | approx_kl | 0.008870167 | | clip_fraction | 0.0635 | | clip_range | 0.2 | | entropy_loss | -2.13 | | explained_variance | -0.00926 | | learning_rate | 0.0003 | | loss | 0.192 | | n_updates | 310 | | policy_gradient_loss | -0.00354 | | std | 0.699 | | value_loss | 0.388 | ----------------------------------------- Reset@2001, accumulated reward: 68.00, Interoception levels: energy:3.00, water:2.00 happiness: 68.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 49 | | time/ | | | fps | 600 | | iterations | 33 | | time_elapsed | 112 | | total_timesteps | 67584 | | train/ | | | approx_kl | 0.010095619 | | clip_fraction | 0.0648 | | clip_range | 0.2 | | entropy_loss | -2.11 | | explained_variance | 0.00128 | | learning_rate | 0.0003 | | loss | 0.292 | | n_updates | 320 | | policy_gradient_loss | -0.003 | | std | 0.693 | | value_loss | 0.32 | ----------------------------------------- Reset@2001, accumulated reward: 82.00, Interoception levels: energy:3.00, water:2.00 happiness: 82.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 49.9 | | time/ | | | fps | 600 | | iterations | 34 | | time_elapsed | 115 | | total_timesteps | 69632 | | train/ | | | approx_kl | 0.0055377763 | | clip_fraction | 0.0332 | | clip_range | 0.2 | | entropy_loss | -2.1 | | explained_variance | -0.0212 | | learning_rate | 0.0003 | | loss | 0.135 | | n_updates | 330 | | policy_gradient_loss | -0.000757 | | std | 0.688 | | value_loss | 0.347 | ------------------------------------------ Reset@2001, accumulated reward: 75.00, Interoception levels: energy:3.00, water:2.00 happiness: 75.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 50.7 | | time/ | | | fps | 601 | | iterations | 35 | | time_elapsed | 119 | | total_timesteps | 71680 | | train/ | | | approx_kl | 0.008326432 | | clip_fraction | 0.0586 | | clip_range | 0.2 | | entropy_loss | -2.09 | | explained_variance | -0.0178 | | learning_rate | 0.0003 | | loss | 0.122 | | n_updates | 340 | | policy_gradient_loss | -0.00415 | | std | 0.686 | | value_loss | 0.388 | ----------------------------------------- Reset@2001, accumulated reward: 80.00, Interoception levels: energy:3.00, water:2.00 happiness: 80.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 51.5 | | time/ | | | fps | 602 | | iterations | 36 | | time_elapsed | 122 | | total_timesteps | 73728 | | train/ | | | approx_kl | 0.0089393975 | | clip_fraction | 0.0955 | | clip_range | 0.2 | | entropy_loss | -2.07 | | explained_variance | 0.0221 | | learning_rate | 0.0003 | | loss | 0.207 | | n_updates | 350 | | policy_gradient_loss | -0.00787 | | std | 0.675 | | value_loss | 0.345 | ------------------------------------------ Reset@2001, accumulated reward: 87.00, Interoception levels: energy:3.00, water:2.00 happiness: 87.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 52.4 | | time/ | | | fps | 602 | | iterations | 37 | | time_elapsed | 125 | | total_timesteps | 75776 | | train/ | | | approx_kl | 0.003764362 | | clip_fraction | 0.0475 | | clip_range | 0.2 | | entropy_loss | -2.05 | | explained_variance | -0.0342 | | learning_rate | 0.0003 | | loss | 0.171 | | n_updates | 360 | | policy_gradient_loss | -0.00115 | | std | 0.674 | | value_loss | 0.316 | ----------------------------------------- Reset@2001, accumulated reward: 86.00, Interoception levels: energy:3.00, water:2.00 happiness: 86.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 53.3 | | time/ | | | fps | 600 | | iterations | 38 | | time_elapsed | 129 | | total_timesteps | 77824 | | train/ | | | approx_kl | 0.007352999 | | clip_fraction | 0.0334 | | clip_range | 0.2 | | entropy_loss | -2.03 | | explained_variance | 0.00293 | | learning_rate | 0.0003 | | loss | 0.187 | | n_updates | 370 | | policy_gradient_loss | -0.000244 | | std | 0.666 | | value_loss | 0.394 | ----------------------------------------- Reset@2001, accumulated reward: 90.00, Interoception levels: energy:3.00, water:2.00 happiness: 91.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 54.3 | | time/ | | | fps | 600 | | iterations | 39 | | time_elapsed | 132 | | total_timesteps | 79872 | | train/ | | | approx_kl | 0.0045955833 | | clip_fraction | 0.054 | | clip_range | 0.2 | | entropy_loss | -2 | | explained_variance | -0.00419 | | learning_rate | 0.0003 | | loss | 0.258 | | n_updates | 380 | | policy_gradient_loss | -0.00238 | | std | 0.651 | | value_loss | 0.501 | ------------------------------------------ Reset@2001, accumulated reward: 92.00, Interoception levels: energy:3.00, water:2.00 happiness: 92.00 ---------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 55.2 | | time/ | | | fps | 601 | | iterations | 40 | | time_elapsed | 136 | | total_timesteps | 81920 | | train/ | | | approx_kl | 0.00479728 | | clip_fraction | 0.0425 | | clip_range | 0.2 | | entropy_loss | -1.97 | | explained_variance | 0.0019 | | learning_rate | 0.0003 | | loss | 0.207 | | n_updates | 390 | | policy_gradient_loss | -0.000864 | | std | 0.646 | | value_loss | 0.482 | ---------------------------------------- Reset@2001, accumulated reward: 92.00, Interoception levels: energy:3.00, water:2.00 happiness: 92.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 56.1 | | time/ | | | fps | 599 | | iterations | 41 | | time_elapsed | 140 | | total_timesteps | 83968 | | train/ | | | approx_kl | 0.004545391 | | clip_fraction | 0.0287 | | clip_range | 0.2 | | entropy_loss | -1.95 | | explained_variance | -0.0206 | | learning_rate | 0.0003 | | loss | 0.318 | | n_updates | 400 | | policy_gradient_loss | -0.00057 | | std | 0.636 | | value_loss | 0.495 | ----------------------------------------- Reset@2001, accumulated reward: 84.00, Interoception levels: energy:3.00, water:2.00 happiness: 84.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 56.8 | | time/ | | | fps | 598 | | iterations | 42 | | time_elapsed | 143 | | total_timesteps | 86016 | | train/ | | | approx_kl | 0.0034634147 | | clip_fraction | 0.0196 | | clip_range | 0.2 | | entropy_loss | -1.93 | | explained_variance | 0.0309 | | learning_rate | 0.0003 | | loss | 0.184 | | n_updates | 410 | | policy_gradient_loss | 0.000713 | | std | 0.632 | | value_loss | 0.343 | ------------------------------------------ Reset@2001, accumulated reward: 79.00, Interoception levels: energy:3.00, water:2.00 happiness: 79.00 Reset@2001, accumulated reward: 76.00, Interoception levels: energy:3.00, water:2.00 happiness: 77.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 57.7 | | time/ | | | fps | 598 | | iterations | 43 | | time_elapsed | 147 | | total_timesteps | 88064 | | train/ | | | approx_kl | 0.005941964 | | clip_fraction | 0.0428 | | clip_range | 0.2 | | entropy_loss | -1.91 | | explained_variance | 0.0016 | | learning_rate | 0.0003 | | loss | 0.208 | | n_updates | 420 | | policy_gradient_loss | -0.00223 | | std | 0.629 | | value_loss | 0.329 | ----------------------------------------- Reset@2001, accumulated reward: 76.00, Interoception levels: energy:3.00, water:2.00 happiness: 76.00 ---------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 58.1 | | time/ | | | fps | 597 | | iterations | 44 | | time_elapsed | 150 | | total_timesteps | 90112 | | train/ | | | approx_kl | 0.00600801 | | clip_fraction | 0.0623 | | clip_range | 0.2 | | entropy_loss | -1.9 | | explained_variance | 0.0205 | | learning_rate | 0.0003 | | loss | 0.16 | | n_updates | 430 | | policy_gradient_loss | -0.00301 | | std | 0.624 | | value_loss | 0.443 | ---------------------------------------- Reset@2001, accumulated reward: 96.00, Interoception levels: energy:3.00, water:2.00 happiness: 96.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 58.9 | | time/ | | | fps | 595 | | iterations | 45 | | time_elapsed | 154 | | total_timesteps | 92160 | | train/ | | | approx_kl | 0.008228833 | | clip_fraction | 0.102 | | clip_range | 0.2 | | entropy_loss | -1.89 | | explained_variance | 0.0528 | | learning_rate | 0.0003 | | loss | 0.204 | | n_updates | 440 | | policy_gradient_loss | -0.0062 | | std | 0.623 | | value_loss | 0.365 | ----------------------------------------- Reset@2001, accumulated reward: 93.00, Interoception levels: energy:3.00, water:2.00 happiness: 94.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 59.7 | | time/ | | | fps | 594 | | iterations | 46 | | time_elapsed | 158 | | total_timesteps | 94208 | | train/ | | | approx_kl | 0.006939375 | | clip_fraction | 0.0545 | | clip_range | 0.2 | | entropy_loss | -1.87 | | explained_variance | 0.0447 | | learning_rate | 0.0003 | | loss | 0.256 | | n_updates | 450 | | policy_gradient_loss | -0.00168 | | std | 0.614 | | value_loss | 0.449 | ----------------------------------------- Reset@2001, accumulated reward: 83.00, Interoception levels: energy:3.00, water:2.00 happiness: 84.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 60.1 | | time/ | | | fps | 593 | | iterations | 47 | | time_elapsed | 162 | | total_timesteps | 96256 | | train/ | | | approx_kl | 0.009517329 | | clip_fraction | 0.062 | | clip_range | 0.2 | | entropy_loss | -1.85 | | explained_variance | 0.00185 | | learning_rate | 0.0003 | | loss | 0.328 | | n_updates | 460 | | policy_gradient_loss | -0.000888 | | std | 0.611 | | value_loss | 0.386 | ----------------------------------------- Reset@2001, accumulated reward: 94.00, Interoception levels: energy:3.00, water:2.00 happiness: 94.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 60.8 | | time/ | | | fps | 592 | | iterations | 48 | | time_elapsed | 166 | | total_timesteps | 98304 | | train/ | | | approx_kl | 0.008446238 | | clip_fraction | 0.0717 | | clip_range | 0.2 | | entropy_loss | -1.86 | | explained_variance | -0.0618 | | learning_rate | 0.0003 | | loss | 0.187 | | n_updates | 470 | | policy_gradient_loss | -0.00414 | | std | 0.615 | | value_loss | 0.391 | ----------------------------------------- Reset@2001, accumulated reward: 106.00, Interoception levels: energy:3.00, water:2.00 happiness: 107.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 61.7 | | time/ | | | fps | 592 | | iterations | 49 | | time_elapsed | 169 | | total_timesteps | 100352 | | train/ | | | approx_kl | 0.005288216 | | clip_fraction | 0.0278 | | clip_range | 0.2 | | entropy_loss | -1.86 | | explained_variance | 0.00939 | | learning_rate | 0.0003 | | loss | 0.323 | | n_updates | 480 | | policy_gradient_loss | -0.000531 | | std | 0.61 | | value_loss | 0.375 | ----------------------------------------- Reset@2001, accumulated reward: 100.00, Interoception levels: energy:3.00, water:2.00 happiness: 100.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 62.5 | | time/ | | | fps | 591 | | iterations | 50 | | time_elapsed | 172 | | total_timesteps | 102400 | | train/ | | | approx_kl | 0.0049711396 | | clip_fraction | 0.0482 | | clip_range | 0.2 | | entropy_loss | -1.84 | | explained_variance | 0.0127 | | learning_rate | 0.0003 | | loss | 0.158 | | n_updates | 490 | | policy_gradient_loss | -0.00148 | | std | 0.603 | | value_loss | 0.467 | ------------------------------------------ Reset@2001, accumulated reward: 96.00, Interoception levels: energy:3.00, water:2.00 happiness: 96.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 63.1 | | time/ | | | fps | 590 | | iterations | 51 | | time_elapsed | 177 | | total_timesteps | 104448 | | train/ | | | approx_kl | 0.0032672083 | | clip_fraction | 0.0559 | | clip_range | 0.2 | | entropy_loss | -1.83 | | explained_variance | 0.0146 | | learning_rate | 0.0003 | | loss | 0.354 | | n_updates | 500 | | policy_gradient_loss | -0.00389 | | std | 0.605 | | value_loss | 0.467 | ------------------------------------------ Reset@2001, accumulated reward: 92.00, Interoception levels: energy:3.00, water:2.00 happiness: 92.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 63.7 | | time/ | | | fps | 589 | | iterations | 52 | | time_elapsed | 180 | | total_timesteps | 106496 | | train/ | | | approx_kl | 0.0059005236 | | clip_fraction | 0.0522 | | clip_range | 0.2 | | entropy_loss | -1.83 | | explained_variance | 0.0327 | | learning_rate | 0.0003 | | loss | 0.374 | | n_updates | 510 | | policy_gradient_loss | -0.000948 | | std | 0.603 | | value_loss | 0.486 | ------------------------------------------ Reset@2001, accumulated reward: 88.00, Interoception levels: energy:3.00, water:2.00 happiness: 88.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 64.1 | | time/ | | | fps | 589 | | iterations | 53 | | time_elapsed | 184 | | total_timesteps | 108544 | | train/ | | | approx_kl | 0.0017334293 | | clip_fraction | 0.019 | | clip_range | 0.2 | | entropy_loss | -1.82 | | explained_variance | -0.0326 | | learning_rate | 0.0003 | | loss | 0.296 | | n_updates | 520 | | policy_gradient_loss | 0.000271 | | std | 0.602 | | value_loss | 0.348 | ------------------------------------------ Reset@2001, accumulated reward: 89.00, Interoception levels: energy:3.00, water:2.00 happiness: 89.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 64.6 | | time/ | | | fps | 590 | | iterations | 54 | | time_elapsed | 187 | | total_timesteps | 110592 | | train/ | | | approx_kl | 0.0077304686 | | clip_fraction | 0.0544 | | clip_range | 0.2 | | entropy_loss | -1.82 | | explained_variance | 0.0284 | | learning_rate | 0.0003 | | loss | 0.188 | | n_updates | 530 | | policy_gradient_loss | -0.00206 | | std | 0.602 | | value_loss | 0.399 | ------------------------------------------ Reset@2001, accumulated reward: 104.00, Interoception levels: energy:3.00, water:2.00 happiness: 104.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 65.3 | | time/ | | | fps | 587 | | iterations | 55 | | time_elapsed | 191 | | total_timesteps | 112640 | | train/ | | | approx_kl | 0.005154536 | | clip_fraction | 0.0495 | | clip_range | 0.2 | | entropy_loss | -1.83 | | explained_variance | -0.0601 | | learning_rate | 0.0003 | | loss | 0.228 | | n_updates | 540 | | policy_gradient_loss | -0.00184 | | std | 0.604 | | value_loss | 0.402 | ----------------------------------------- Reset@2001, accumulated reward: 95.00, Interoception levels: energy:3.00, water:2.00 happiness: 96.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 65.8 | | time/ | | | fps | 587 | | iterations | 56 | | time_elapsed | 195 | | total_timesteps | 114688 | | train/ | | | approx_kl | 0.005725412 | | clip_fraction | 0.0461 | | clip_range | 0.2 | | entropy_loss | -1.82 | | explained_variance | -0.0179 | | learning_rate | 0.0003 | | loss | 0.286 | | n_updates | 550 | | policy_gradient_loss | -0.00149 | | std | 0.599 | | value_loss | 0.442 | ----------------------------------------- Reset@2001, accumulated reward: 86.00, Interoception levels: energy:3.00, water:2.00 happiness: 87.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 66.2 | | time/ | | | fps | 588 | | iterations | 57 | | time_elapsed | 198 | | total_timesteps | 116736 | | train/ | | | approx_kl | 0.0067106243 | | clip_fraction | 0.076 | | clip_range | 0.2 | | entropy_loss | -1.81 | | explained_variance | -0.0147 | | learning_rate | 0.0003 | | loss | 0.0993 | | n_updates | 560 | | policy_gradient_loss | -0.00523 | | std | 0.596 | | value_loss | 0.401 | ------------------------------------------ Reset@2001, accumulated reward: 99.00, Interoception levels: energy:3.00, water:2.00 happiness: 99.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 66.7 | | time/ | | | fps | 588 | | iterations | 58 | | time_elapsed | 201 | | total_timesteps | 118784 | | train/ | | | approx_kl | 0.005555094 | | clip_fraction | 0.068 | | clip_range | 0.2 | | entropy_loss | -1.8 | | explained_variance | 0.0114 | | learning_rate | 0.0003 | | loss | 0.168 | | n_updates | 570 | | policy_gradient_loss | -0.00371 | | std | 0.595 | | value_loss | 0.404 | ----------------------------------------- Reset@2001, accumulated reward: 102.00, Interoception levels: energy:3.00, water:2.00 happiness: 102.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 67.3 | | time/ | | | fps | 587 | | iterations | 59 | | time_elapsed | 205 | | total_timesteps | 120832 | | train/ | | | approx_kl | 0.0029288568 | | clip_fraction | 0.0525 | | clip_range | 0.2 | | entropy_loss | -1.78 | | explained_variance | -0.0293 | | learning_rate | 0.0003 | | loss | 0.351 | | n_updates | 580 | | policy_gradient_loss | -0.00394 | | std | 0.584 | | value_loss | 0.524 | ------------------------------------------ Reset@2001, accumulated reward: 95.00, Interoception levels: energy:3.00, water:2.00 happiness: 95.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 67.8 | | time/ | | | fps | 587 | | iterations | 60 | | time_elapsed | 209 | | total_timesteps | 122880 | | train/ | | | approx_kl | 0.009489188 | | clip_fraction | 0.0734 | | clip_range | 0.2 | | entropy_loss | -1.75 | | explained_variance | -0.000798 | | learning_rate | 0.0003 | | loss | 0.313 | | n_updates | 590 | | policy_gradient_loss | -0.00336 | | std | 0.576 | | value_loss | 0.647 | ----------------------------------------- Reset@2001, accumulated reward: 97.00, Interoception levels: energy:3.00, water:2.00 happiness: 97.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 68.2 | | time/ | | | fps | 587 | | iterations | 61 | | time_elapsed | 212 | | total_timesteps | 124928 | | train/ | | | approx_kl | 0.008955405 | | clip_fraction | 0.0683 | | clip_range | 0.2 | | entropy_loss | -1.72 | | explained_variance | 0.0155 | | learning_rate | 0.0003 | | loss | 0.186 | | n_updates | 600 | | policy_gradient_loss | -0.00262 | | std | 0.568 | | value_loss | 0.473 | ----------------------------------------- Reset@2001, accumulated reward: 107.00, Interoception levels: energy:3.00, water:2.00 happiness: 107.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 68.8 | | time/ | | | fps | 587 | | iterations | 62 | | time_elapsed | 216 | | total_timesteps | 126976 | | train/ | | | approx_kl | 0.0045913523 | | clip_fraction | 0.0676 | | clip_range | 0.2 | | entropy_loss | -1.72 | | explained_variance | -0.0401 | | learning_rate | 0.0003 | | loss | 0.161 | | n_updates | 610 | | policy_gradient_loss | -0.00325 | | std | 0.572 | | value_loss | 0.402 | ------------------------------------------ Reset@2001, accumulated reward: 75.00, Interoception levels: energy:3.00, water:2.00 happiness: 75.00 ------------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 68.9 | | time/ | | | fps | 586 | | iterations | 63 | | time_elapsed | 219 | | total_timesteps | 129024 | | train/ | | | approx_kl | 0.00043652696 | | clip_fraction | 0.035 | | clip_range | 0.2 | | entropy_loss | -1.72 | | explained_variance | 0.0109 | | learning_rate | 0.0003 | | loss | 0.201 | | n_updates | 620 | | policy_gradient_loss | -0.000524 | | std | 0.57 | | value_loss | 0.573 | ------------------------------------------- Reset@2001, accumulated reward: 85.00, Interoception levels: energy:3.00, water:2.00 happiness: 85.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 69.2 | | time/ | | | fps | 587 | | iterations | 64 | | time_elapsed | 223 | | total_timesteps | 131072 | | train/ | | | approx_kl | 0.009251371 | | clip_fraction | 0.0791 | | clip_range | 0.2 | | entropy_loss | -1.7 | | explained_variance | 0.0106 | | learning_rate | 0.0003 | | loss | 0.158 | | n_updates | 630 | | policy_gradient_loss | -0.00464 | | std | 0.563 | | value_loss | 0.477 | ----------------------------------------- Reset@2001, accumulated reward: 106.00, Interoception levels: energy:3.00, water:2.00 happiness: 106.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 69.7 | | time/ | | | fps | 587 | | iterations | 65 | | time_elapsed | 226 | | total_timesteps | 133120 | | train/ | | | approx_kl | 0.006205908 | | clip_fraction | 0.0405 | | clip_range | 0.2 | | entropy_loss | -1.68 | | explained_variance | -0.0211 | | learning_rate | 0.0003 | | loss | 0.433 | | n_updates | 640 | | policy_gradient_loss | -0.000916 | | std | 0.556 | | value_loss | 0.534 | ----------------------------------------- Reset@2001, accumulated reward: 95.00, Interoception levels: energy:3.00, water:2.00 happiness: 95.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 70.1 | | time/ | | | fps | 585 | | iterations | 66 | | time_elapsed | 230 | | total_timesteps | 135168 | | train/ | | | approx_kl | 0.0031221597 | | clip_fraction | 0.0755 | | clip_range | 0.2 | | entropy_loss | -1.66 | | explained_variance | 0.00471 | | learning_rate | 0.0003 | | loss | 0.471 | | n_updates | 650 | | policy_gradient_loss | -0.00383 | | std | 0.554 | | value_loss | 0.519 | ------------------------------------------ Reset@2001, accumulated reward: 97.00, Interoception levels: energy:3.00, water:2.00 happiness: 98.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 70.5 | | time/ | | | fps | 586 | | iterations | 67 | | time_elapsed | 234 | | total_timesteps | 137216 | | train/ | | | approx_kl | 0.007189829 | | clip_fraction | 0.061 | | clip_range | 0.2 | | entropy_loss | -1.65 | | explained_variance | -0.00399 | | learning_rate | 0.0003 | | loss | 0.273 | | n_updates | 660 | | policy_gradient_loss | -0.00176 | | std | 0.549 | | value_loss | 0.482 | ----------------------------------------- Reset@2001, accumulated reward: 77.00, Interoception levels: energy:3.00, water:2.00 happiness: 77.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 70.6 | | time/ | | | fps | 586 | | iterations | 68 | | time_elapsed | 237 | | total_timesteps | 139264 | | train/ | | | approx_kl | 0.006149846 | | clip_fraction | 0.065 | | clip_range | 0.2 | | entropy_loss | -1.62 | | explained_variance | 0.00314 | | learning_rate | 0.0003 | | loss | 0.174 | | n_updates | 670 | | policy_gradient_loss | -0.00133 | | std | 0.541 | | value_loss | 0.425 | ----------------------------------------- Reset@2001, accumulated reward: 101.00, Interoception levels: energy:3.00, water:2.00 happiness: 101.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 71 | | time/ | | | fps | 586 | | iterations | 69 | | time_elapsed | 240 | | total_timesteps | 141312 | | train/ | | | approx_kl | 0.010984565 | | clip_fraction | 0.0814 | | clip_range | 0.2 | | entropy_loss | -1.59 | | explained_variance | 0.0196 | | learning_rate | 0.0003 | | loss | 0.279 | | n_updates | 680 | | policy_gradient_loss | -0.00518 | | std | 0.531 | | value_loss | 0.421 | ----------------------------------------- Reset@2001, accumulated reward: 85.00, Interoception levels: energy:3.00, water:2.00 happiness: 85.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 71.2 | | time/ | | | fps | 586 | | iterations | 70 | | time_elapsed | 244 | | total_timesteps | 143360 | | train/ | | | approx_kl | 0.0068652863 | | clip_fraction | 0.0449 | | clip_range | 0.2 | | entropy_loss | -1.58 | | explained_variance | -0.0247 | | learning_rate | 0.0003 | | loss | 0.197 | | n_updates | 690 | | policy_gradient_loss | -0.000948 | | std | 0.532 | | value_loss | 0.496 | ------------------------------------------ Reset@2001, accumulated reward: 114.00, Interoception levels: energy:3.00, water:2.00 happiness: 114.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 71.8 | | time/ | | | fps | 586 | | iterations | 71 | | time_elapsed | 248 | | total_timesteps | 145408 | | train/ | | | approx_kl | 0.009303605 | | clip_fraction | 0.0797 | | clip_range | 0.2 | | entropy_loss | -1.56 | | explained_variance | 0.0366 | | learning_rate | 0.0003 | | loss | 0.163 | | n_updates | 700 | | policy_gradient_loss | -0.00297 | | std | 0.526 | | value_loss | 0.392 | ----------------------------------------- Reset@2001, accumulated reward: 96.00, Interoception levels: energy:3.00, water:2.00 happiness: 96.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 72.2 | | time/ | | | fps | 586 | | iterations | 72 | | time_elapsed | 251 | | total_timesteps | 147456 | | train/ | | | approx_kl | 0.0040140115 | | clip_fraction | 0.0443 | | clip_range | 0.2 | | entropy_loss | -1.55 | | explained_variance | -0.0404 | | learning_rate | 0.0003 | | loss | 0.178 | | n_updates | 710 | | policy_gradient_loss | 0.000315 | | std | 0.524 | | value_loss | 0.542 | ------------------------------------------ Reset@2001, accumulated reward: 110.00, Interoception levels: energy:3.00, water:2.00 happiness: 110.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 72.7 | | time/ | | | fps | 586 | | iterations | 73 | | time_elapsed | 254 | | total_timesteps | 149504 | | train/ | | | approx_kl | 0.0032575822 | | clip_fraction | 0.0591 | | clip_range | 0.2 | | entropy_loss | -1.54 | | explained_variance | -0.00723 | | learning_rate | 0.0003 | | loss | 0.246 | | n_updates | 720 | | policy_gradient_loss | -0.00175 | | std | 0.519 | | value_loss | 0.433 | ------------------------------------------ Reset@2001, accumulated reward: 80.00, Interoception levels: energy:3.00, water:2.00 happiness: 80.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 72.8 | | time/ | | | fps | 587 | | iterations | 74 | | time_elapsed | 258 | | total_timesteps | 151552 | | train/ | | | approx_kl | 0.009227354 | | clip_fraction | 0.0773 | | clip_range | 0.2 | | entropy_loss | -1.51 | | explained_variance | -0.0146 | | learning_rate | 0.0003 | | loss | 0.213 | | n_updates | 730 | | policy_gradient_loss | -0.00319 | | std | 0.511 | | value_loss | 0.406 | ----------------------------------------- Reset@2001, accumulated reward: 106.00, Interoception levels: energy:3.00, water:2.00 happiness: 106.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 73.2 | | time/ | | | fps | 587 | | iterations | 75 | | time_elapsed | 261 | | total_timesteps | 153600 | | train/ | | | approx_kl | 0.0059727877 | | clip_fraction | 0.0494 | | clip_range | 0.2 | | entropy_loss | -1.49 | | explained_variance | -0.0283 | | learning_rate | 0.0003 | | loss | 0.22 | | n_updates | 740 | | policy_gradient_loss | -0.000741 | | std | 0.508 | | value_loss | 0.486 | ------------------------------------------ Reset@2001, accumulated reward: 95.00, Interoception levels: energy:3.00, water:2.00 happiness: 96.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 73.5 | | time/ | | | fps | 588 | | iterations | 76 | | time_elapsed | 264 | | total_timesteps | 155648 | | train/ | | | approx_kl | 0.006471106 | | clip_fraction | 0.0536 | | clip_range | 0.2 | | entropy_loss | -1.48 | | explained_variance | -0.00249 | | learning_rate | 0.0003 | | loss | 0.15 | | n_updates | 750 | | policy_gradient_loss | -0.00128 | | std | 0.506 | | value_loss | 0.431 | ----------------------------------------- Reset@2001, accumulated reward: 97.00, Interoception levels: energy:3.00, water:2.00 happiness: 97.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 73.8 | | time/ | | | fps | 588 | | iterations | 77 | | time_elapsed | 267 | | total_timesteps | 157696 | | train/ | | | approx_kl | 0.009507492 | | clip_fraction | 0.0508 | | clip_range | 0.2 | | entropy_loss | -1.48 | | explained_variance | 0.00147 | | learning_rate | 0.0003 | | loss | 0.142 | | n_updates | 760 | | policy_gradient_loss | -0.000401 | | std | 0.507 | | value_loss | 0.373 | ----------------------------------------- Reset@2001, accumulated reward: 96.00, Interoception levels: energy:3.00, water:2.00 happiness: 96.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 74.1 | | time/ | | | fps | 589 | | iterations | 78 | | time_elapsed | 271 | | total_timesteps | 159744 | | train/ | | | approx_kl | 0.0050100638 | | clip_fraction | 0.0677 | | clip_range | 0.2 | | entropy_loss | -1.48 | | explained_variance | -0.00271 | | learning_rate | 0.0003 | | loss | 0.166 | | n_updates | 770 | | policy_gradient_loss | -0.00271 | | std | 0.506 | | value_loss | 0.479 | ------------------------------------------ Reset@2001, accumulated reward: 93.00, Interoception levels: energy:3.00, water:2.00 happiness: 93.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 74.3 | | time/ | | | fps | 589 | | iterations | 79 | | time_elapsed | 274 | | total_timesteps | 161792 | | train/ | | | approx_kl | 0.004497858 | | clip_fraction | 0.0655 | | clip_range | 0.2 | | entropy_loss | -1.47 | | explained_variance | 0.0148 | | learning_rate | 0.0003 | | loss | 0.0871 | | n_updates | 780 | | policy_gradient_loss | -0.00265 | | std | 0.504 | | value_loss | 0.365 | ----------------------------------------- Reset@2001, accumulated reward: 108.00, Interoception levels: energy:3.00, water:2.00 happiness: 108.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 74.7 | | time/ | | | fps | 589 | | iterations | 80 | | time_elapsed | 277 | | total_timesteps | 163840 | | train/ | | | approx_kl | 0.0056636995 | | clip_fraction | 0.0477 | | clip_range | 0.2 | | entropy_loss | -1.47 | | explained_variance | 0.0267 | | learning_rate | 0.0003 | | loss | 0.265 | | n_updates | 790 | | policy_gradient_loss | -0.000368 | | std | 0.505 | | value_loss | 0.488 | ------------------------------------------ Reset@2001, accumulated reward: 103.00, Interoception levels: energy:3.00, water:2.00 happiness: 104.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 75.1 | | time/ | | | fps | 589 | | iterations | 81 | | time_elapsed | 281 | | total_timesteps | 165888 | | train/ | | | approx_kl | 0.011304654 | | clip_fraction | 0.0471 | | clip_range | 0.2 | | entropy_loss | -1.48 | | explained_variance | -0.0732 | | learning_rate | 0.0003 | | loss | 0.237 | | n_updates | 800 | | policy_gradient_loss | -0.00052 | | std | 0.509 | | value_loss | 0.507 | ----------------------------------------- Reset@2001, accumulated reward: 104.00, Interoception levels: energy:3.00, water:2.00 happiness: 104.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 75.4 | | time/ | | | fps | 589 | | iterations | 82 | | time_elapsed | 284 | | total_timesteps | 167936 | | train/ | | | approx_kl | 0.0047630738 | | clip_fraction | 0.0759 | | clip_range | 0.2 | | entropy_loss | -1.46 | | explained_variance | -0.00688 | | learning_rate | 0.0003 | | loss | 0.206 | | n_updates | 810 | | policy_gradient_loss | -0.00251 | | std | 0.496 | | value_loss | 0.488 | ------------------------------------------ Reset@2001, accumulated reward: 105.00, Interoception levels: energy:3.00, water:2.00 happiness: 105.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 75.8 | | time/ | | | fps | 589 | | iterations | 83 | | time_elapsed | 288 | | total_timesteps | 169984 | | train/ | | | approx_kl | 0.0062959776 | | clip_fraction | 0.0617 | | clip_range | 0.2 | | entropy_loss | -1.42 | | explained_variance | 0.00612 | | learning_rate | 0.0003 | | loss | 0.0955 | | n_updates | 820 | | policy_gradient_loss | -0.00358 | | std | 0.489 | | value_loss | 0.535 | ------------------------------------------ Reset@2001, accumulated reward: 84.00, Interoception levels: energy:3.00, water:2.00 happiness: 84.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 75.9 | | time/ | | | fps | 589 | | iterations | 84 | | time_elapsed | 291 | | total_timesteps | 172032 | | train/ | | | approx_kl | 0.004638277 | | clip_fraction | 0.0344 | | clip_range | 0.2 | | entropy_loss | -1.39 | | explained_variance | -0.0104 | | learning_rate | 0.0003 | | loss | 0.173 | | n_updates | 830 | | policy_gradient_loss | 0.000808 | | std | 0.481 | | value_loss | 0.426 | ----------------------------------------- Reset@2001, accumulated reward: 103.00, Interoception levels: energy:3.00, water:2.00 happiness: 103.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 76.2 | | time/ | | | fps | 588 | | iterations | 85 | | time_elapsed | 295 | | total_timesteps | 174080 | | train/ | | | approx_kl | 0.008157315 | | clip_fraction | 0.0506 | | clip_range | 0.2 | | entropy_loss | -1.38 | | explained_variance | 0.00721 | | learning_rate | 0.0003 | | loss | 0.197 | | n_updates | 840 | | policy_gradient_loss | -0.000936 | | std | 0.485 | | value_loss | 0.422 | ----------------------------------------- Reset@2001, accumulated reward: 93.00, Interoception levels: energy:3.00, water:2.00 happiness: 93.00 Reset@2001, accumulated reward: 95.00, Interoception levels: energy:3.00, water:2.00 happiness: 96.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 76.6 | | time/ | | | fps | 588 | | iterations | 86 | | time_elapsed | 299 | | total_timesteps | 176128 | | train/ | | | approx_kl | 0.004514564 | | clip_fraction | 0.0564 | | clip_range | 0.2 | | entropy_loss | -1.38 | | explained_variance | -0.0473 | | learning_rate | 0.0003 | | loss | 0.318 | | n_updates | 850 | | policy_gradient_loss | -0.00107 | | std | 0.484 | | value_loss | 0.422 | ----------------------------------------- Reset@2001, accumulated reward: 87.00, Interoception levels: energy:3.00, water:2.00 happiness: 88.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 76.7 | | time/ | | | fps | 589 | | iterations | 87 | | time_elapsed | 302 | | total_timesteps | 178176 | | train/ | | | approx_kl | 0.0075806417 | | clip_fraction | 0.0595 | | clip_range | 0.2 | | entropy_loss | -1.39 | | explained_variance | 0.0137 | | learning_rate | 0.0003 | | loss | 0.138 | | n_updates | 860 | | policy_gradient_loss | -0.00151 | | std | 0.488 | | value_loss | 0.391 | ------------------------------------------ Reset@2001, accumulated reward: 103.00, Interoception levels: energy:3.00, water:2.00 happiness: 103.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 77 | | time/ | | | fps | 589 | | iterations | 88 | | time_elapsed | 305 | | total_timesteps | 180224 | | train/ | | | approx_kl | 0.0062497417 | | clip_fraction | 0.0421 | | clip_range | 0.2 | | entropy_loss | -1.4 | | explained_variance | -0.0431 | | learning_rate | 0.0003 | | loss | 0.201 | | n_updates | 870 | | policy_gradient_loss | 0.000914 | | std | 0.486 | | value_loss | 0.437 | ------------------------------------------ Reset@2001, accumulated reward: 116.00, Interoception levels: energy:3.00, water:2.00 happiness: 116.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 77.4 | | time/ | | | fps | 589 | | iterations | 89 | | time_elapsed | 309 | | total_timesteps | 182272 | | train/ | | | approx_kl | 0.0020123282 | | clip_fraction | 0.0527 | | clip_range | 0.2 | | entropy_loss | -1.38 | | explained_variance | 0.0105 | | learning_rate | 0.0003 | | loss | 0.209 | | n_updates | 880 | | policy_gradient_loss | -0.000518 | | std | 0.477 | | value_loss | 0.374 | ------------------------------------------ Reset@2001, accumulated reward: 90.00, Interoception levels: energy:3.00, water:2.00 happiness: 90.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 77.6 | | time/ | | | fps | 589 | | iterations | 90 | | time_elapsed | 312 | | total_timesteps | 184320 | | train/ | | | approx_kl | 0.0039226096 | | clip_fraction | 0.0677 | | clip_range | 0.2 | | entropy_loss | -1.34 | | explained_variance | -0.0244 | | learning_rate | 0.0003 | | loss | 0.283 | | n_updates | 890 | | policy_gradient_loss | -0.00188 | | std | 0.468 | | value_loss | 0.663 | ------------------------------------------ Reset@2001, accumulated reward: 95.00, Interoception levels: energy:3.00, water:2.00 happiness: 95.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 77.8 | | time/ | | | fps | 589 | | iterations | 91 | | time_elapsed | 316 | | total_timesteps | 186368 | | train/ | | | approx_kl | 0.011078471 | | clip_fraction | 0.095 | | clip_range | 0.2 | | entropy_loss | -1.3 | | explained_variance | 0.0162 | | learning_rate | 0.0003 | | loss | 0.152 | | n_updates | 900 | | policy_gradient_loss | -0.00441 | | std | 0.462 | | value_loss | 0.424 | ----------------------------------------- Reset@2001, accumulated reward: 99.00, Interoception levels: energy:3.00, water:2.00 happiness: 100.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 78 | | time/ | | | fps | 588 | | iterations | 92 | | time_elapsed | 319 | | total_timesteps | 188416 | | train/ | | | approx_kl | 0.0066265306 | | clip_fraction | 0.0685 | | clip_range | 0.2 | | entropy_loss | -1.28 | | explained_variance | -0.0587 | | learning_rate | 0.0003 | | loss | 0.136 | | n_updates | 910 | | policy_gradient_loss | -0.00111 | | std | 0.458 | | value_loss | 0.408 | ------------------------------------------ Reset@2001, accumulated reward: 92.00, Interoception levels: energy:3.00, water:2.00 happiness: 92.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 78.1 | | time/ | | | fps | 588 | | iterations | 93 | | time_elapsed | 323 | | total_timesteps | 190464 | | train/ | | | approx_kl | 0.0053549223 | | clip_fraction | 0.0676 | | clip_range | 0.2 | | entropy_loss | -1.27 | | explained_variance | -0.00552 | | learning_rate | 0.0003 | | loss | 0.148 | | n_updates | 920 | | policy_gradient_loss | -0.00248 | | std | 0.457 | | value_loss | 0.43 | ------------------------------------------ Reset@2001, accumulated reward: 91.00, Interoception levels: energy:3.00, water:2.00 happiness: 92.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 78.3 | | time/ | | | fps | 589 | | iterations | 94 | | time_elapsed | 326 | | total_timesteps | 192512 | | train/ | | | approx_kl | 0.009860445 | | clip_fraction | 0.084 | | clip_range | 0.2 | | entropy_loss | -1.26 | | explained_variance | -0.0176 | | learning_rate | 0.0003 | | loss | 0.249 | | n_updates | 930 | | policy_gradient_loss | -0.00386 | | std | 0.452 | | value_loss | 0.385 | ----------------------------------------- Reset@2001, accumulated reward: 98.00, Interoception levels: energy:3.00, water:2.00 happiness: 98.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 78.5 | | time/ | | | fps | 588 | | iterations | 95 | | time_elapsed | 330 | | total_timesteps | 194560 | | train/ | | | approx_kl | 0.0030130236 | | clip_fraction | 0.0481 | | clip_range | 0.2 | | entropy_loss | -1.24 | | explained_variance | -0.00524 | | learning_rate | 0.0003 | | loss | 0.161 | | n_updates | 940 | | policy_gradient_loss | -0.00118 | | std | 0.447 | | value_loss | 0.339 | ------------------------------------------ Reset@2001, accumulated reward: 108.00, Interoception levels: energy:3.00, water:2.00 happiness: 108.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 78.8 | | time/ | | | fps | 589 | | iterations | 96 | | time_elapsed | 333 | | total_timesteps | 196608 | | train/ | | | approx_kl | 0.0058927145 | | clip_fraction | 0.0613 | | clip_range | 0.2 | | entropy_loss | -1.22 | | explained_variance | -0.00059 | | learning_rate | 0.0003 | | loss | 0.16 | | n_updates | 950 | | policy_gradient_loss | -0.000233 | | std | 0.443 | | value_loss | 0.423 | ------------------------------------------ Reset@2001, accumulated reward: 94.00, Interoception levels: energy:3.00, water:2.00 happiness: 94.00 ------------------------------------------ | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 78.9 | | time/ | | | fps | 589 | | iterations | 97 | | time_elapsed | 336 | | total_timesteps | 198656 | | train/ | | | approx_kl | 0.0090521695 | | clip_fraction | 0.0713 | | clip_range | 0.2 | | entropy_loss | -1.2 | | explained_variance | 0.0516 | | learning_rate | 0.0003 | | loss | 0.161 | | n_updates | 960 | | policy_gradient_loss | -0.00137 | | std | 0.441 | | value_loss | 0.449 | ------------------------------------------ Reset@2001, accumulated reward: 110.00, Interoception levels: energy:3.00, water:2.00 happiness: 110.00 ----------------------------------------- | rollout/ | | | ep_len_mean | 2e+03 | | ep_rew_mean | 79.2 | | time/ | | | fps | 589 | | iterations | 98 | | time_elapsed | 340 | | total_timesteps | 200704 | | train/ | | | approx_kl | 0.005115848 | | clip_fraction | 0.0488 | | clip_range | 0.2 | | entropy_loss | -1.19 | | explained_variance | 0.0296 | | learning_rate | 0.0003 | | loss | 0.245 | | n_updates | 970 | | policy_gradient_loss | -0.000638 | | std | 0.437 | | value_loss | 0.514 | ----------------------------------------- Reset@604, accumulated reward: 30.00, Interoception levels: energy:3.00, water:2.00 happiness: 30.00 Reset@2001, accumulated reward: 105.00, Interoception levels: energy:3.00, water:2.00 happiness: 105.00 Reset@2001, accumulated reward: 96.00, Interoception levels: energy:3.00, water:2.00 happiness: 96.00 Reset@2001, accumulated reward: 92.00, Interoception levels: energy:3.00, water:2.00 happiness: 92.00 Reset@2001, accumulated reward: 94.00, Interoception levels: energy:3.00, water:2.00 happiness: 94.00 Reset@2001, accumulated reward: 112.00, Interoception levels: energy:3.00, water:2.00 happiness: 112.00 Reset@2001, accumulated reward: 104.00, Interoception levels: energy:3.00, water:2.00 happiness: 104.00 Reset@2001, accumulated reward: 94.00, Interoception levels: energy:3.00, water:2.00 happiness: 94.00 Reset@2001, accumulated reward: 74.00, Interoception levels: energy:3.00, water:2.00 happiness: 75.00 Reset@2001, accumulated reward: 111.00, Interoception levels: energy:3.00, water:2.00 happiness: 111.00 Reset@2001, accumulated reward: 90.00, Interoception levels: energy:3.00, water:2.00 happiness: 90.00 Mean reward: 97.2, std. dev.: 10.74988372030135
%%capture
a = AnimationHelper(env, model)
a.init_animation();
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=2000, interval=50, blit=True)
HTML(anim.to_html5_video())
Done @1998, 2001, 0 Reset@2001, accumulated reward: 91.00, Interoception levels: energy:3.00, water:2.00 happiness: 91.00
def world4():
terrain_args = {"objects": {"dandelion": 20, "water": 20}}
# terrain_args = {"objects": {"dandelion": 20*(np.random.random((20,2))-.5),
# "water": 20*(np.random.random((20,2))-.5)}}
agent_args = {
"hyperparameters": {"max_age": 2000, "delta": 0.1, "close": 0.5},
# "distances": {"dandelion": 28, "water": 28},
"distances": {"dandelion": 5, "water": 5},
"interoception": {"energy": 3, "water": 3},
"use_interoception_as_obs": True,
"use_intensity_as_obs": True,
"use_multi_direction": True,
"use_single_direction": True,
"use_happiness_as_obs": True,
"use_reward_radius_in_perception": True,
"normalize_action": False,
# "action_noise": 0.05,
"homeostatic_effects": { # Nutrition table
("move", None): {"energy": -0.01, "water": -0.01},
("consume", "water"): {"energy": 0, "water": 1},
("consume", "dandelion"): {"energy": 1, "water": 0.0},
},
}
t = Terrain(**terrain_args)
agent = MultiSheep(**agent_args)
eco = Ecosystem(t, agent)
env = EcoEnv(eco)
return env
from collections import defaultdict
from ecotwins.utility import normalize
class RandomAgent:
def __init__(self, eco, n_dir=2):
self.eco = eco
self.cur_dir = 0
self.max_dir = n_dir
# Used to determine when to change direction. The direction is changed when
# we have found/consemd an object.
self.e_levels = env.agent.interoception.copy()
def predict(self, observation, **kwargs):
action = normalize(np.random.random((2)) - 0.5)
return action, None # None needed in order to mimic stable-baseline3
def episode(self, n_step=None):
n_steps = np.iinfo(int).max if n_step is None else n_steps
obs = self.eco.reset()
for i in range(n_steps):
action = normalize(np.random.random((2)) - 0.5)
obs, reward, done, _ = self.eco.step(action)
if done:
break
%%capture
env = world4()
env.agent.hyperparameters['delta'] = 1
model = RandomAgent(env)
a = AnimationHelper(env, model)
a.init_animation();
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())
%%capture
env = world4()
model = ReflexAgent(env, n_dir=4)
a = AnimationHelper(env, model)
a.init_animation();
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())
extend_training = False
env = world4()
num_cpu = 4
env = DummyVecEnv([world4 for i in range(num_cpu)])
device = 'cpu'
if not extend_training:
# Instantiate the agent
model = A2C('MlpPolicy', env, verbose=1, use_sde=True, device=device)
# model = TD3('MlpPolicy', env, verbose=1, device='cuda')
else:
model.set_env(env)
# Train the model
model.learn(total_timesteps=2e5, log_interval=400)
# model.learn(total_timesteps=2e5)
# Evaluate the model
mean_reward, std_reward = evaluate_policy(model, world4(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward:.2f}, std. dev.: {std_reward:.2f}')
# Save the model
# model.save("multi_sheep_champion.pth")
# Load the model
# model = PPO.load("multi_sheep_champion.pth")
/home/niklas/miniconda3/envs/gym/lib/python3.9/site-packages/gym/logger.py:30: UserWarning: WARN: Box bound precision lowered by casting to float32
warnings.warn(colorize('%s: %s'%('WARN', msg % args), 'yellow'))
/home/niklas/miniconda3/envs/gym/lib/python3.9/site-packages/torch/autograd/__init__.py:145: UserWarning: CUDA initialization: CUDA unknown error - this may be due to an incorrectly set up environment, e.g. changing env variable CUDA_VISIBLE_DEVICES after program start. Setting the available devices to be zero. (Triggered internally at /opt/conda/conda-bld/pytorch_1616554798336/work/c10/cuda/CUDAFunctions.cpp:109.)
Variable._execution_engine.run_backward(
Using cpu device Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00, water:3.00 happiness: 9.00 Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00, water:3.00 happiness: 9.00 Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00, water:3.00 happiness: 9.00 Reset@0, accumulated reward: 0.00, Interoception levels: energy:3.00, water:3.00 happiness: 9.00 Reset@300, accumulated reward: -10.65, Interoception levels: energy:3.00, water:0.00 happiness: 0.00 Reset@300, accumulated reward: -10.65, Interoception levels: energy:0.00, water:0.00 happiness: 0.00 Reset@300, accumulated reward: -10.65, Interoception levels: energy:0.00, water:1.00 happiness: 0.00 Reset@500, accumulated reward: -9.75, Interoception levels: energy:0.00, water:1.00 happiness: 0.00 Reset@300, accumulated reward: -10.65, Interoception levels: energy:0.00, water:0.00 happiness: 0.00 Reset@300, accumulated reward: -10.65, Interoception levels: energy:0.00, water:0.00 happiness: 0.00 Reset@300, accumulated reward: -10.65, Interoception levels: energy:1.00, water:0.00 happiness: 0.00 Reset@400, accumulated reward: -10.20, Interoception levels: energy:0.00, water:1.00 happiness: 0.00 Reset@300, accumulated reward: -10.65, Interoception levels: energy:2.00, water:0.00 happiness: 0.00 Reset@400, accumulated reward: -10.20, Interoception levels: energy:0.00, water:0.00 happiness: 0.00 Reset@500, accumulated reward: -9.75, Interoception levels: energy:0.00, water:1.00 happiness: 0.00 Reset@300, accumulated reward: -10.65, Interoception levels: energy:0.00, water:0.00 happiness: 0.00 Reset@400, accumulated reward: -10.20, Interoception levels: energy:0.00, water:0.00 happiness: 0.00 Reset@400, accumulated reward: -10.20, Interoception levels: energy:2.00, water:0.00 happiness: 0.00 Reset@500, accumulated reward: -9.75, Interoception levels: energy:0.00, water:0.00 happiness: 0.00 Reset@300, accumulated reward: -10.65, Interoception levels: energy:0.00, water:0.00 happiness: 0.00 Reset@400, accumulated reward: -10.20, Interoception levels: energy:0.00, water:2.00 happiness: 0.00 Reset@300, accumulated reward: -10.65, Interoception levels: energy:1.00, water:0.00 happiness: 0.00 ------------------------------------ | time/ | | | fps | 295 | | iterations | 400 | | time_elapsed | 27 | | total_timesteps | 8000 | | train/ | | | entropy_loss | -6.22 | | explained_variance | 0.995 | | learning_rate | 0.0007 | | n_updates | 399 | | policy_loss | 0.0111 | | std | 1.02 | | value_loss | 0.00455 | ------------------------------------ Reset@600, accumulated reward: -9.30, Interoception levels: energy:2.00, water:0.00 happiness: 0.00 Reset@500, accumulated reward: -9.75, Interoception levels: energy:0.00, water:0.00 happiness: 0.00 Reset@800, accumulated reward: -8.40, Interoception levels: energy:0.00, water:0.00 happiness: 0.00 Reset@700, accumulated reward: -8.85, Interoception levels: energy:0.00, water:2.00 happiness: 0.00 Reset@400, accumulated reward: -10.20, Interoception levels: energy:0.00, water:3.00 happiness: 0.00 Reset@400, accumulated reward: -10.20, Interoception levels: energy:0.00, water:2.00 happiness: 0.00 Reset@800, accumulated reward: -8.40, Interoception levels: energy:0.00, water:9.00 happiness: 0.00 Reset@700, accumulated reward: -8.85, Interoception levels: energy:2.00, water:0.00 happiness: 0.00 Reset@600, accumulated reward: -9.30, Interoception levels: energy:0.00, water:5.00 happiness: 0.00 ------------------------------------ | time/ | | | fps | 303 | | iterations | 800 | | time_elapsed | 52 | | total_timesteps | 16000 | | train/ | | | entropy_loss | -5.89 | | explained_variance | 0.976 | | learning_rate | 0.0007 | | n_updates | 799 | | policy_loss | -0.516 | | std | 0.986 | | value_loss | 0.0895 | ------------------------------------ Reset@2001, accumulated reward: 41.87, Interoception levels: energy:6.99, water:5.99 happiness: 41.87 Reset@1400, accumulated reward: -5.70, Interoception levels: energy:6.00, water:0.00 happiness: 0.00 Reset@2001, accumulated reward: 47.86, Interoception levels: energy:5.99, water:7.99 happiness: 47.86 Reset@600, accumulated reward: -9.30, Interoception levels: energy:3.00, water:0.00 happiness: 0.00 Reset@2001, accumulated reward: 27.89, Interoception levels: energy:6.99, water:3.99 happiness: 27.89 Reset@600, accumulated reward: -9.30, Interoception levels: energy:11.00, water:0.00 happiness: 0.00 ------------------------------------ | time/ | | | fps | 309 | | iterations | 1200 | | time_elapsed | 77 | | total_timesteps | 24000 | | train/ | | | entropy_loss | -5.98 | | explained_variance | 0.974 | | learning_rate | 0.0007 | | n_updates | 1199 | | policy_loss | 1.69 | | std | 0.958 | | value_loss | 0.448 | ------------------------------------ Reset@800, accumulated reward: -8.40, Interoception levels: energy:0.00, water:2.00 happiness: 0.00 Reset@900, accumulated reward: -7.95, Interoception levels: energy:4.00, water:0.00 happiness: 0.00 Reset@2001, accumulated reward: 87.81, Interoception levels: energy:7.99, water:10.99 happiness: 87.81 Reset@2001, accumulated reward: 87.81, Interoception levels: energy:10.99, water:7.99 happiness: 87.81 ------------------------------------ | time/ | | | fps | 318 | | iterations | 1600 | | time_elapsed | 100 | | total_timesteps | 32000 | | train/ | | | entropy_loss | -5.96 | | explained_variance | 0.994 | | learning_rate | 0.0007 | | n_updates | 1599 | | policy_loss | -1.27 | | std | 0.933 | | value_loss | 0.145 | ------------------------------------ Reset@2001, accumulated reward: 246.68, Interoception levels: energy:12.99, water:18.99 happiness: 246.68 Reset@2001, accumulated reward: 125.77, Interoception levels: energy:13.99, water:8.99 happiness: 125.77 Reset@2001, accumulated reward: 169.73, Interoception levels: energy:16.99, water:9.99 happiness: 169.73 Reset@2001, accumulated reward: 135.75, Interoception levels: energy:7.99, water:16.99 happiness: 135.75 ------------------------------------ | time/ | | | fps | 318 | | iterations | 2000 | | time_elapsed | 125 | | total_timesteps | 40000 | | train/ | | | entropy_loss | -5.79 | | explained_variance | 0.67 | | learning_rate | 0.0007 | | n_updates | 1999 | | policy_loss | 2.74 | | std | 0.892 | | value_loss | 4.92 | ------------------------------------ Reset@2001, accumulated reward: 129.77, Interoception levels: energy:9.99, water:12.99 happiness: 129.77 Reset@2001, accumulated reward: 237.69, Interoception levels: energy:16.99, water:13.99 happiness: 237.69 Reset@2001, accumulated reward: 104.78, Interoception levels: energy:6.99, water:14.99 happiness: 104.78 Reset@2001, accumulated reward: 259.67, Interoception levels: energy:12.99, water:19.99 happiness: 259.67 ------------------------------------ | time/ | | | fps | 321 | | iterations | 2400 | | time_elapsed | 149 | | total_timesteps | 48000 | | train/ | | | entropy_loss | -5.81 | | explained_variance | 0.984 | | learning_rate | 0.0007 | | n_updates | 2399 | | policy_loss | 2.8 | | std | 0.876 | | value_loss | 1.16 | ------------------------------------ Reset@2001, accumulated reward: 129.77, Interoception levels: energy:9.99, water:12.99 happiness: 129.77 Reset@1300, accumulated reward: -6.15, Interoception levels: energy:0.00, water:15.00 happiness: 0.00 Reset@2001, accumulated reward: 189.71, Interoception levels: energy:18.99, water:9.99 happiness: 189.71 Reset@2001, accumulated reward: 246.68, Interoception levels: energy:12.99, water:18.99 happiness: 246.68 ------------------------------------ | time/ | | | fps | 324 | | iterations | 2800 | | time_elapsed | 172 | | total_timesteps | 56000 | | train/ | | | entropy_loss | -5.38 | | explained_variance | 0.995 | | learning_rate | 0.0007 | | n_updates | 2799 | | policy_loss | -0.463 | | std | 0.85 | | value_loss | 0.259 | ------------------------------------ Reset@2001, accumulated reward: 259.67, Interoception levels: energy:12.99, water:19.99 happiness: 259.67 Reset@2001, accumulated reward: 242.64, Interoception levels: energy:26.99, water:8.99 happiness: 242.64 Reset@2001, accumulated reward: 189.71, Interoception levels: energy:9.99, water:18.99 happiness: 189.71 Reset@700, accumulated reward: -8.85, Interoception levels: energy:5.00, water:0.00 happiness: 0.00 Reset@2001, accumulated reward: 55.85, Interoception levels: energy:7.99, water:6.99 happiness: 55.85 Reset@1200, accumulated reward: -6.60, Interoception levels: energy:17.00, water:0.00 happiness: 0.00 ------------------------------------ | time/ | | | fps | 326 | | iterations | 3200 | | time_elapsed | 196 | | total_timesteps | 64000 | | train/ | | | entropy_loss | -6.04 | | explained_variance | 0.963 | | learning_rate | 0.0007 | | n_updates | 3199 | | policy_loss | 3.95 | | std | 0.837 | | value_loss | 1.79 | ------------------------------------ Reset@800, accumulated reward: -8.40, Interoception levels: energy:0.00, water:1.00 happiness: 0.00 Reset@2001, accumulated reward: 219.69, Interoception levels: energy:19.99, water:10.99 happiness: 219.69 Reset@2001, accumulated reward: 168.74, Interoception levels: energy:12.99, water:12.99 happiness: 168.74 Reset@2001, accumulated reward: 11.87, Interoception levels: energy:11.99, water:0.99 happiness: 11.87 Reset@700, accumulated reward: -8.85, Interoception levels: energy:0.00, water:3.00 happiness: 0.00 Reset@800, accumulated reward: -8.40, Interoception levels: energy:0.00, water:3.00 happiness: 0.00 ------------------------------------ | time/ | | | fps | 324 | | iterations | 3600 | | time_elapsed | 222 | | total_timesteps | 72000 | | train/ | | | entropy_loss | -5.32 | | explained_variance | 0.982 | | learning_rate | 0.0007 | | n_updates | 3599 | | policy_loss | -0.816 | | std | 0.82 | | value_loss | 2.61 | ------------------------------------ Reset@2001, accumulated reward: 344.62, Interoception levels: energy:22.99, water:14.99 happiness: 344.62 Reset@2001, accumulated reward: 351.62, Interoception levels: energy:15.99, water:21.99 happiness: 351.62 Reset@1100, accumulated reward: -7.05, Interoception levels: energy:0.00, water:9.00 happiness: 0.00 Reset@2001, accumulated reward: 111.78, Interoception levels: energy:13.99, water:7.99 happiness: 111.78 ------------------------------------ | time/ | | | fps | 325 | | iterations | 4000 | | time_elapsed | 245 | | total_timesteps | 80000 | | train/ | | | entropy_loss | -5.61 | | explained_variance | 0.905 | | learning_rate | 0.0007 | | n_updates | 3999 | | policy_loss | 9.48 | | std | 0.807 | | value_loss | 18.3 | ------------------------------------ Reset@2001, accumulated reward: 367.61, Interoception levels: energy:15.99, water:22.99 happiness: 367.61 Reset@2001, accumulated reward: 99.80, Interoception levels: energy:9.99, water:9.99 happiness: 99.80 Reset@2001, accumulated reward: 239.68, Interoception levels: energy:11.99, water:19.99 happiness: 239.68 Reset@1000, accumulated reward: -7.50, Interoception levels: energy:9.00, water:0.00 happiness: 0.00 Reset@2001, accumulated reward: 299.65, Interoception levels: energy:19.99, water:14.99 happiness: 299.65 ------------------------------------ | time/ | | | fps | 326 | | iterations | 4400 | | time_elapsed | 269 | | total_timesteps | 88000 | | train/ | | | entropy_loss | -5.91 | | explained_variance | 0.986 | | learning_rate | 0.0007 | | n_updates | 4399 | | policy_loss | 3.84 | | std | 0.792 | | value_loss | 2.68 | ------------------------------------ Reset@2001, accumulated reward: 269.67, Interoception levels: energy:17.99, water:14.99 happiness: 269.67 Reset@2001, accumulated reward: 206.68, Interoception levels: energy:8.99, water:22.99 happiness: 206.68 Reset@1900, accumulated reward: -3.45, Interoception levels: energy:0.00, water:23.00 happiness: 0.00 Reset@900, accumulated reward: -7.95, Interoception levels: energy:0.00, water:4.00 happiness: 0.00 Reset@2001, accumulated reward: 399.60, Interoception levels: energy:19.99, water:19.99 happiness: 399.60 ------------------------------------ | time/ | | | fps | 327 | | iterations | 4800 | | time_elapsed | 293 | | total_timesteps | 96000 | | train/ | | | entropy_loss | -5.61 | | explained_variance | 0.99 | | learning_rate | 0.0007 | | n_updates | 4799 | | policy_loss | 0.361 | | std | 0.778 | | value_loss | 0.863 | ------------------------------------ Reset@600, accumulated reward: -9.30, Interoception levels: energy:5.00, water:0.00 happiness: 0.00 Reset@2001, accumulated reward: 377.61, Interoception levels: energy:17.99, water:20.99 happiness: 377.61 Reset@600, accumulated reward: -9.30, Interoception levels: energy:8.00, water:0.00 happiness: 0.00 Reset@1900, accumulated reward: -3.45, Interoception levels: energy:5.00, water:0.00 happiness: 0.00 Reset@2001, accumulated reward: 298.64, Interoception levels: energy:12.99, water:22.99 happiness: 298.64 ------------------------------------ | time/ | | | fps | 328 | | iterations | 5200 | | time_elapsed | 317 | | total_timesteps | 104000 | | train/ | | | entropy_loss | -5.91 | | explained_variance | 0.91 | | learning_rate | 0.0007 | | n_updates | 5199 | | policy_loss | -1.05 | | std | 0.771 | | value_loss | 12.7 | ------------------------------------ Reset@2001, accumulated reward: 207.71, Interoception levels: energy:12.99, water:15.99 happiness: 207.71 Reset@2001, accumulated reward: 298.64, Interoception levels: energy:22.99, water:12.99 happiness: 298.64 Reset@2001, accumulated reward: 179.73, Interoception levels: energy:11.99, water:14.99 happiness: 179.73 Reset@2001, accumulated reward: 377.61, Interoception levels: energy:17.99, water:20.99 happiness: 377.61 ------------------------------------ | time/ | | | fps | 327 | | iterations | 5600 | | time_elapsed | 341 | | total_timesteps | 112000 | | train/ | | | entropy_loss | -5.76 | | explained_variance | 0.994 | | learning_rate | 0.0007 | | n_updates | 5599 | | policy_loss | -0.465 | | std | 0.765 | | value_loss | 1.46 | ------------------------------------ Reset@2001, accumulated reward: 179.72, Interoception levels: energy:9.99, water:17.99 happiness: 179.72 Reset@2001, accumulated reward: 149.75, Interoception levels: energy:14.99, water:9.99 happiness: 149.75 Reset@2001, accumulated reward: 360.62, Interoception levels: energy:18.99, water:18.99 happiness: 360.62 Reset@2001, accumulated reward: 143.75, Interoception levels: energy:15.99, water:8.99 happiness: 143.75 ------------------------------------ | time/ | | | fps | 327 | | iterations | 6000 | | time_elapsed | 365 | | total_timesteps | 120000 | | train/ | | | entropy_loss | -5.59 | | explained_variance | 0.844 | | learning_rate | 0.0007 | | n_updates | 5999 | | policy_loss | 18.7 | | std | 0.751 | | value_loss | 10.8 | ------------------------------------ Reset@2001, accumulated reward: 143.75, Interoception levels: energy:15.99, water:8.99 happiness: 143.75 Reset@2001, accumulated reward: 485.55, Interoception levels: energy:26.99, water:17.99 happiness: 485.55
--------------------------------------------------------------------------- KeyboardInterrupt Traceback (most recent call last) <ipython-input-8-cb999a8c3175> in <module> 13 14 # Train the model ---> 15 model.learn(total_timesteps=2e5, log_interval=400) 16 # model.learn(total_timesteps=2e5) 17 ~/miniconda3/envs/gym/lib/python3.9/site-packages/stable_baselines3/a2c/a2c.py in learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps) 188 ) -> "A2C": 189 --> 190 return super(A2C, self).learn( 191 total_timesteps=total_timesteps, 192 callback=callback, ~/miniconda3/envs/gym/lib/python3.9/site-packages/stable_baselines3/common/on_policy_algorithm.py in learn(self, total_timesteps, callback, log_interval, eval_env, eval_freq, n_eval_episodes, tb_log_name, eval_log_path, reset_num_timesteps) 225 while self.num_timesteps < total_timesteps: 226 --> 227 continue_training = self.collect_rollouts(self.env, callback, self.rollout_buffer, n_rollout_steps=self.n_steps) 228 229 if continue_training is False: ~/miniconda3/envs/gym/lib/python3.9/site-packages/stable_baselines3/common/on_policy_algorithm.py in collect_rollouts(self, env, callback, rollout_buffer, n_rollout_steps) 166 clipped_actions = np.clip(actions, self.action_space.low, self.action_space.high) 167 --> 168 new_obs, rewards, dones, infos = env.step(clipped_actions) 169 170 self.num_timesteps += env.num_envs ~/miniconda3/envs/gym/lib/python3.9/site-packages/stable_baselines3/common/vec_env/base_vec_env.py in step(self, actions) 161 """ 162 self.step_async(actions) --> 163 return self.step_wait() 164 165 def get_images(self) -> Sequence[np.ndarray]: ~/miniconda3/envs/gym/lib/python3.9/site-packages/stable_baselines3/common/vec_env/dummy_vec_env.py in step_wait(self) 41 def step_wait(self) -> VecEnvStepReturn: 42 for env_idx in range(self.num_envs): ---> 43 obs, self.buf_rews[env_idx], self.buf_dones[env_idx], self.buf_infos[env_idx] = self.envs[env_idx].step( 44 self.actions[env_idx] 45 ) /w10/Users/niklas/Documents/AI AGI DT/ecotwin/gh_repo/ecotwins/ecotwins/ecoenv.py in step(self, action) 75 reward = self._reward() 76 self.total_reward += reward ---> 77 obs = self._next_observation() 78 done = self.ecosystem.is_done() 79 /w10/Users/niklas/Documents/AI AGI DT/ecotwin/gh_repo/ecotwins/ecotwins/ecoenv.py in _next_observation(self) 38 39 def _next_observation(self): ---> 40 return self.agent.observation(self.ecosystem.terrain) 41 42 # Helper function to step. /w10/Users/niklas/Documents/AI AGI DT/ecotwin/gh_repo/ecotwins/ecotwins/animal_classes.py in observation(self, terrain) 596 def observation(self, terrain: Terrain): 597 if self.n_frames == 1: --> 598 return self._one_observation_frame(terrain) 599 elif self.observation_frame is None: 600 # Initialize observation_frame /w10/Users/niklas/Documents/AI AGI DT/ecotwin/gh_repo/ecotwins/ecotwins/animal_classes.py in _one_observation_frame(self, terrain) 610 if self.use_multi_direction: 611 for name, p in self.perception.items(): --> 612 o.append(p.multi_direction(self.position, terrain.objects[name])) 613 # print('multi', o[-1].shape) 614 if self.use_single_direction: /w10/Users/niklas/Documents/AI AGI DT/ecotwin/gh_repo/ecotwins/ecotwins/perception.py in multi_direction(self, position, objects) 88 # Possible input to the policy network 89 def multi_direction(self, position, objects): ---> 90 signals = self.stimulus(distance(position, objects)).reshape(-1, 1) 91 vector_sum = (signals * normalize(objects - position)).sum( 92 axis=0, keepdims=True /w10/Users/niklas/Documents/AI AGI DT/ecotwin/gh_repo/ecotwins/ecotwins/perception.py in stimulus(self, d, radius) 51 # epsilon = np.sqrt(1e-1) 52 d = np.maximum(d - self.reward_radius, 0) ---> 53 v = 1 / (self.epsilon + d) ** 2 54 v[d > radius] = 0 55 if self.use_clip: KeyboardInterrupt:
%%capture
env = world4()
a = AnimationHelper(env, model)
a.init_animation(show_consumed=False);
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=2000, interval=50, blit=True)
HTML(anim.to_html5_video())
Done @1998, 2001, 0 Reset@2001, accumulated reward: 84.78, Interoception levels: energy:16.99, water:4.99 happiness: 84.78
# anim.save('animation.mp4', writer = FFwriter)
# anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
anim.save('multi_sheep_20211213-with-happiness-obs.mp4',
fps=30,
extra_args=['-vcodec', 'h264', '-pix_fmt', 'yuv420p']
)
def world6():
terrain_args = {
# "objects": {"dandelion": 100, "water": 100, "grass": 100},
"objects": {"dandelion": 20, "water": 20, "grass": 20},
"torus": False
}
# terrain_args = {"objects": {"dandelion": 200, "water": 200, "grass": 200}}
# terrain_args = {"objects": {"dandelion": 20*(np.random.random((20,2))-.5), "water": 20*(np.random.random((20,2))-.5), "grass": 20*(np.random.random((20,2))-.5)}}
agent_args = {
"hyperparameters": {"max_age": 2000, "delta": 0.1, "close": .5, "gamma": 0.9},
# "distances": {"dandelion": 28, "water": 28},
"distances": {"dandelion": 5, "water": 5, "grass": 5},
"interoception": {"energy": 3, "water": 3, "protein": 3},
"use_interoception_as_obs": True,
"use_intensity_as_obs": True,
"use_single_direction": True,
"use_multi_direction": True,
"use_happiness_as_obs": True,
"use_reward_radius_in_perception": True,
"normalize_action": False,
# "perception_noise": 0.01,
"homeostatic_effects": { # Nutrition table
("move", None): {"energy": -0.01, "water": -0.01, "protein": -0.01},
("consume", "water"): {"energy": 0, "water": 1, "protein": 0},
("consume", "dandelion"): {"energy": 1, "water": 0, "protein": 0},
("consume", "grass"): {"energy": 0, "water": 0, "protein": 1},
},
"n_frames":1,
}
t = Terrain(**terrain_args)
agent = MultiSheep(**agent_args)
eco = Ecosystem(t, agent)
env = EcoEnv(eco)
return env
extend_training = True
env = world6()
if not extend_training:
# Instantiate the agent
device='cpu'
model = A2C('MlpPolicy', env, verbose=1, use_sde=True, device=device)
else:
model.set_env(env)
# model = PPO('MlpPolicy', env, verbose=1)
# Train the agent
model.learn(total_timesteps=2e5, log_interval=400)
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward}, std. dev.: {std_reward}')
# Save the model
# model.save("multi_sheep_with_normalization.pth")
# Load the model
# model = PPO.load("multi_sheep_with_normalization.pth")
%%capture
env = world6()
a = AnimationHelper(env, model)
a.init_animation(show_consumed=False);
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=2000, interval=50, blit=True)
HTML(anim.to_html5_video())
FFwriter = animation.FFMpegWriter(fps=60)
# anim.save('3needs.mp4', writer = FFwriter)
anim.save('3needs.mp4',
fps=30,
extra_args=['-vcodec', 'h264', '-pix_fmt', 'yuv420p']
)
%%capture
env = world6()
m = ReflexAgent(env, 3)
a = AnimationHelper(env, m)
a.init_animation(show_consumed=False);
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=200, interval=50, blit=True)
HTML(anim.to_html5_video())
def thorny_world():
terrain_args = {"objects": {"dandelion": 20, "thorn": 20}}
# terrain_args = {"objects": {"dandelion": 20*(np.random.random((20,2))-.5),
# "water": 20*(np.random.random((20,2))-.5)}}
agent_args = {
"hyperparameters": {"max_age": 2000, "delta": 0.1, "close": .25},
# "distances": {"dandelion": 28, "water": 28},
"distances": {"dandelion": 10, "thorn": 5},
"interoception": {"energy": 3},
"use_interoception_as_obs": True,
"use_intensity_as_obs": True,
"use_single_direction": True,
"use_multi_direction": True,
"use_reward_radius_in_perception": True,
"use_happiness_as_obs": True,
"normalize_action": False,
"homeostatic_effects": { # Nutrition table
("move", None): {"energy": -0.01},
("consume", "thorn"): {"energy": -5},
("consume", "dandelion"): {"energy": 1},
},
}
t = Terrain(**terrain_args)
agent = MultiSheep(**agent_args) # need: only energy, object perception: dandelion and thorn
eco = Ecosystem(t, agent)
env = EcoEnv(eco)
return env
env = thorny_world()
# Create the model
# model = PPO('MlpPolicy', env, verbose=1, use_sde=True, tensorboard_log='./tb_logs')
model = A2C('MlpPolicy', env, verbose=1, use_sde=True, tensorboard_log='./tb_logs', device='cuda')
# model = A2C('MlpPolicy', env, verbose=1, use_sde=True, tensorboard_log='./tb_logs', device='cpu')
# Train the model
model.learn(total_timesteps=2e5, log_interval=400, eval_freq=10000)
# Evaluate the model
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward:.2f}, std. dev.: {std_reward:.2f}')
# Save the model
# model.save("multi_sheep_champion.pth")
# Load the model
# model = PPO.load("multi_sheep_champion.pth")
%%capture
# Perhaps a somewhat unclean way of suppressing the cell output.
env = thorny_world()
a = AnimationHelper(env, model)
a.init_animation(show_consumed=False);
anim = animation.FuncAnimation(a.fig, a.drawframe, frames=2000, interval=50, blit=True)
HTML(anim.to_html5_video())
import types
# The modified happiness function
def h(agent, t=None):
return agent.interoception['energy'] + agent.p_happiness
env = thorny_world()
env.agent._init_p_happiness = types.MethodType(lambda x: 0.0, env.agent)
env.agent.happiness = types.MethodType(h, env.agent)
model = A2C('MlpPolicy', env, verbose=1, use_sde=True, tensorboard_log='./tb_logs', device='cpu')
# Train the model
model.learn(total_timesteps=300, log_interval=400, eval_freq=10000)
# Evaluate the model
mean_reward, std_reward = evaluate_policy(model, model.get_env(), n_eval_episodes=10)
print(f'Mean reward: {mean_reward:.2f}, std. dev.: {std_reward:.2f}')
More sheep worlds
New animals
More details in animal_classes.
class AnimationHelper[source]
AnimationHelper(env,model)
from nbdev.export import notebook2script; notebook2script()